### NYC Public Employees Minimum Wage Voter Turnout
### This file replicates our main analysis, which focuses on the turnout of municipal employees in NYC 
### using the voter file merge laid out in our main paper
### plus some robustness tests from the SI

#load some packages
library(tidyverse)
library(data.table)
library(feather)
library(sandwich)
library(lmtest)
library(ggplot2)
library(ggthemes)
library(stargazer)
library(plm) 
library(xtable)
library(lfe)
library(broom)

n_boot <- 100

df <- data.table(read_feather("Data/NY_Voters_and_Employees.feather"))
df$employeekey <- as.numeric(as.factor(df$employeekey))
everyone <- df

#we only want people that earned at/above the prevailing minimum wage (not foster grandparent positions, etc.)
everyone[, minsalary:= min(BaseSalary.2011,BaseSalary.2012, BaseSalary.2013, BaseSalary.2014, na.rm=T), by=list(employeekey, PayBasis)]
everyone[, MaxSalary:= max(BaseSalary.2011, BaseSalary.2012, BaseSalary.2013, BaseSalary.2014, na.rm=T), by=list(employeekey, PayBasis)]
everyone <- everyone[!(minsalary <7.25),]

##add the undermin variable
everyone[!is.na(BaseSalary.2011) | !is.na(BaseSalary.2012) | !is.na(BaseSalary.2013) | !is.na(BaseSalary.2014), UnderMin:= FALSE] 
everyone[(everyone$'BaseSalary.2011' < 8.00 | everyone$'BaseSalary.2012' < 8.00 | everyone$'BaseSalary.2013' < 8.00 | everyone$'BaseSalary.2014' < 8.00 ), UnderMin:=TRUE]


#function to fit 2016 v 2012 panels for a given dataset
fit_mod_16 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), #salary = rep(df$'BaseSalary.2016',2),
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'Voted16', df$'Voted12'),
                     year = c(rep(16, nrow(df)), rep(12, nrow(df))))
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
}


everyone_under15 <- fit_mod_16(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone_under15_se <- sqrt(diag(vcov(everyone_under15)))

everyone_all <- fit_mod_16(everyone[(everyone$PayBasis == "per Hour"),])
everyone_all_se <- sqrt(diag(vcov(everyone_all)))


everyone$UnderMin14 <- everyone$UnderMin 

#function to fit 2014 v 2010 panels for a given dataset
fit_mod_14 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2),
                     underMin = rep(df$'UnderMin14', 2),  voted = c(df$'Voted14', df$'Voted10'),
                     year = c(rep(14, nrow(df)), rep(10, nrow(df))))
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
}
everyone14_all <- fit_mod_14(everyone[(everyone$PayBasis == "per Hour"),])
everyone14_all_se <- sqrt(diag(vcov(everyone14_all)))

everyone14_under15 <- fit_mod_14(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone14_under15_se <- sqrt(diag(vcov(everyone14_under15)))

everyone$UnderMin17 <- everyone$UnderMin

#function to fit 2017 v 2013 panels for a given dataset
fit_mod_17 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), salary = rep(df$BaseSalary.2014,2),
                     underMin = rep(df$'UnderMin17', 2),  voted = c(df$'Voted17', df$'Voted13'),
                     year = c(rep(17, nrow(df)), rep(13, nrow(df))))
  
    model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
  
}

everyone17_all <- fit_mod_17(everyone[(everyone$PayBasis == "per Hour"),])
everyone17_all_se <- sqrt(diag(vcov(everyone17_all)))
everyone17_all_se <- sqrt(diag(vcov(everyone17_all)))

everyone17_all_se <- sqrt(diag(vcov(everyone17_all)))
everyone17_under15 <- fit_mod_17(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone17_under15_se <- sqrt(diag(vcov(everyone17_under15)))

get_est <- function(cut_points){
  lower <- cut_points$lower
  upper <- cut_points$upper
  
  df_new <- everyone[everyone$BaseSalary.2013 >= lower & !(everyone$BaseSalary.2013>upper & everyone$BaseSalary.2013 < 11),]
  
  df_new$UnderMin <- df_new$BaseSalary.2013 < upper
  
  long <- data.frame(employee=rep(df_new$'employeekey',2), salary = rep(df_new$BaseSalary.2014,2),
                     underMin = rep(df_new$'UnderMin', 2),  voted = c(df_new$'Voted17', df_new$'Voted13'),
                     year = c(rep(17, nrow(df_new)), rep(13, nrow(df_new))))
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  model
}


calc_under_min_size <- function(cut_points){
  lower <- cut_points$lower
  upper <- cut_points$upper
  sum(everyone$BaseSalary.2013 >= lower & everyone$BaseSalary.2013 < upper, na.rm=T)
}

cuts <- lapply(seq(7, 10.5, by=.5), function(x){list(lower=x,upper=x+.5)})
cuts[[1]]$lower <- 7.25

um_size <- lapply(cuts, calc_under_min_size)
mods <- lapply(cuts, get_est)
mods <- mods[um_size !=1]

ests <- lapply(mods, function(x){coef(x)[4]})
ses <- lapply(mods, function(x){sqrt(vcov(x)[4,4])})
ests <- data.frame(ests=unlist(ests), ses=unlist(ses))
ests$lower <- ests$ests - ests$ses*1.96
ests$upper <- ests$ests + ests$ses*1.96
ests$range <- unlist(lapply(cuts[um_size !=1], function(x){paste0(x$lower, '-', x$upper)}))
ests$range <- factor(ests$range, levels=ests$range)
ests$range

pdf("Figures/continuous_treatment_plot.pdf", height=5, width=7)
ggplot(ests, aes(x=range, ymin=lower, ymax=upper, y=ests)) + geom_errorbar() + geom_point() + theme_few() + 
  geom_hline(yintercept=0, linetype='dotted') + xlab('2013 Pay Group') + 
  ylab('Change in Turnout Relative to Those\n Already Making Over $11 an Hour')
dev.off()


stargazer(everyone_all, everyone_under15,everyone14_all, everyone14_under15, everyone17_all, everyone17_under15,
          se=list(everyone_all_se,everyone_under15_se,everyone14_all_se, everyone14_under15_se, everyone17_under15_se, everyone17_all_se, everyone17_under15_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2016", "Under New MW * 2016", "2014", "Under New MW * 2014","2017", "Under New MW * 2017"),
          column.labels=c("Voted in Presidential", "Voted in Midterm",  "Voted in Mayoral"),
          column.separate = c(2,2,2),
          add.lines = list(c("Included Employees", "All Hourly", "Hourly Under \\$15",  "All Hourly", "Hourly Under \\$15","All Hourly", "Hourly Under \\$15" )),
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=c("f", 'ser'),
          title="Main Individual Difference-in-Differences Estimates",
          out="Tables/everyone_mainests_allyrs.tex", label="everyone_mainests", dep.var.labels.include=FALSE) 


mods <- do.call(rbind, lapply(list(everyone_all, everyone_under15,everyone14_all, everyone14_under15, everyone17_all, everyone17_under15),tidy))
mods$se <- c(everyone_all_se, everyone_under15_se, everyone14_all_se, everyone14_under15_se, everyone17_all_se, everyone17_under15_se)
mods <- mods[grepl(':', mods$term),]
mods$Election <- c('Presidential (2012-2016)', 'Presidential (2012-2016)', 'Midterm (2010-2014)', 'Midterm (2010-2014)', "Mayoral (2013-2017)", 'Mayoral (2013-2017)')
mods$lower <- mods$estimate - 1.96*mods$se
mods$upper <- mods$estimate + 1.96*mods$se
mods$'Employees Included' <- factor(rep(c('All', 'Under $15 Per Hour'),3))


pdf("Figures/figure1.pdf", height=3.5, width=8)
ggplot(data=mods, aes(y=estimate, ymin=lower, ymax=upper, x=Election, 
                      shape=`Employees Included`, color=`Employees Included`)) + 
  geom_errorbar(position=position_dodge2(width=.5,reverse=T),width=.5) + 
  geom_point(size=3, position=position_dodge2(width=.5, reverse=T)) +
  geom_hline(alpha=.5, yintercept=0, linetype='dashed') +coord_flip() + theme_few() +
  ylab(expression(atop("Increase in Turnout From Minimum Wage Increase", paste("(Percentage Points)")))) + xlab('') +
  theme(axis.text.y = element_text(colour = "black")) 
dev.off()


votingdata <- subset(everyone, select=c("employeekey", "UnderMin", "Voted00", "Voted02", "Voted04", "Voted06", "Voted08", "Voted10", "Voted12", "Voted14", "Voted16"))
votinglong <- reshape(votingdata, varying = c("Voted00", "Voted02", "Voted04", "Voted06", "Voted08", "Voted10", "Voted12", "Voted14", "Voted16") , v.names= "Voted", timevar = "electionyr", times=c("00", "02", "04", "06", "08", "10", "12", "14", "16"), direction="long")

votinglong$UnderMin <- recode(as.character(votinglong$UnderMin), "TRUE"="Below 2014 
Minimum Wage", 'FALSE'='Above 2014 
Minimum Wage')

head(votinglong); dim(votingdata); dim(votinglong)
votingmeans <- votinglong[, list(mean(Voted)), by=list(UnderMin, electionyr)]

votingmeans %>%
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line()

p1 <- votingmeans %>%
  filter(!is.na(UnderMin)) %>%
  filter(!electionyr %in% c("00","14","16")) %>% 
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line()   + expand_limits(y=0) + expand_limits(y=.4) + 
  ggtitle("All Elections") +ylab("Turnout") +
  theme_few()+ theme(legend.title=element_blank())+ xlab("")


p2 <- votingmeans %>%
  filter(!is.na(UnderMin)) %>%
  filter(!electionyr %in% c("00","02", "06", "10", "14","16")) %>%
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line() + expand_limits(y=0)+ 
  expand_limits(y=.4) + ggtitle("Presidential Years Only") + ylab("Turnout") +
  theme_few()+ theme(legend.title=element_blank())+ xlab("")


p3 <- votingmeans %>%
  filter(!is.na(UnderMin)) %>%
  filter(!electionyr %in% c("00","04", "08", "12", "14","16")) %>%
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line() + 
  expand_limits(y=0) + expand_limits(y=.4) +ggtitle("Midterm Years Only")+ylab("Turnout") +xlab("Election Year")+
  theme_few()+ theme(legend.title=element_blank())+ xlab("")

library(gridExtra)
grid.arrange(p1, p2, p3, ncol=3)
dev.off()

pdf("Figures/NYCparalleltrends.pdf", height=4, width=11)
grid.arrange(p1, p2, p3, ncol=3)
dev.off()



get_est <- function(cut_points){
  lower <- cut_points$lower
  upper <- cut_points$upper
  
  df_new <- everyone[everyone$BaseSalary.2013 >= lower & !(everyone$BaseSalary.2013>upper & everyone$BaseSalary.2013 < 11),]
  
  df_new$UnderMin <- df_new$BaseSalary.2013 < upper
  
  long <- data.frame(employee=rep(df_new$'employeekey',2), salary = rep(df_new$BaseSalary.2014,2),
                     underMin = rep(df_new$'UnderMin', 2),  voted = c(df_new$'Voted17', df_new$'Voted13'),
                     year = c(rep(17, nrow(df_new)), rep(13, nrow(df_new))))
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  model
}


calc_under_min_size <- function(cut_points){
  lower <- cut_points$lower
  upper <- cut_points$upper
  sum(everyone$BaseSalary.2013 >= lower & everyone$BaseSalary.2013 < upper, na.rm=T)
}

cuts <- lapply(seq(7, 10.5, by=.5), function(x){list(lower=x,upper=x+.5)})
cuts[[1]]$lower <- 7.25

um_size <- lapply(cuts, calc_under_min_size)

mods <- lapply(cuts, get_est)
mods <- mods[um_size !=1]

ests <- lapply(mods, function(x){coef(x)[4]})
ses <- lapply(mods, function(x){sqrt(vcov(x)[4,4])})
ests <- data.frame(ests=unlist(ests), ses=unlist(ses))
ests$lower <- ests$ests - ests$ses*1.96
ests$upper <- ests$ests + ests$ses*1.96
ests$range <- unlist(lapply(cuts[um_size !=1], function(x){paste0(x$lower, '-', x$upper)}))
ests$range <- factor(ests$range, levels=ests$range)
ests$range

pdf("Figures/continuous_treatment_plot.pdf", height=5, width=7)
ggplot(ests, aes(x=range, ymin=lower, ymax=upper, y=ests)) + geom_errorbar() + geom_point() + theme_few() + 
  geom_hline(yintercept=0, linetype='dotted') + xlab('2013 Pay Group') + 
  ylab('Change in Turnout Relative to Those\n Already Making Over $11 an Hour')
dev.off()

#and look at it for people under $15 as well
votingdata_under15 <- subset(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),], select=c("employeekey", "UnderMin", "Voted00", "Voted02", "Voted04", "Voted06", "Voted08", "Voted10", "Voted12", "Voted14", "Voted16"))
votinglong_under15 <- reshape(votingdata_under15, varying = c("Voted00", "Voted02", "Voted04", "Voted06", "Voted08", "Voted10", "Voted12", "Voted14", "Voted16") , v.names= "Voted", timevar = "electionyr", times=c("00", "02", "04", "06", "08", "10", "12", "14", "16"), direction="long")
votinglong_under15$UnderMin <- recode(as.character(votinglong_under15$UnderMin), "TRUE"="Below 2014 
Minimum Wage", 'FALSE'='Above 2014 
Minimum Wage')

head(votinglong_under15); dim(votingdata_under15); dim(votinglong_under15)
votingmeans_under15 <- votinglong_under15[, list(mean(Voted)), by=list(UnderMin, electionyr)]

votingmeans_under15 %>%
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line()  + expand_limits(y=0)

p1.15 <- votingmeans_under15 %>%
  filter(!is.na(UnderMin)) %>%
  filter(!electionyr %in% c("00","14","16")) %>% #drop post-treat years, plus for some reason we have no voting in 2000?
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line()  + 
  ggtitle("All Elections") +
  expand_limits(y=0) + expand_limits(y=.4) + ylab("Turnout") +
  theme_few()+ theme(legend.title=element_blank()) + xlab("")

p2.15 <- votingmeans_under15 %>%
  filter(!is.na(UnderMin)) %>%
  filter(!electionyr %in% c("00","02", "06", "10", "14", "16")) %>%
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) +
  ggtitle("Presidential Years Only") +
  geom_line() + expand_limits(y=0)  +expand_limits(y=.4)+ylab("Turnout")+
  theme_few() + theme(legend.title=element_blank()) + xlab("")

p3.15 <- votingmeans_under15 %>%
  filter(!is.na(UnderMin)) %>%
  filter(!electionyr %in% c("00","04", "08", "12","14", "16")) %>%
  ggplot(aes(x = electionyr,y = V1, color=UnderMin, group=UnderMin)) + geom_line() +
  expand_limits(y=0) +expand_limits(y=.4)+ylab("Turnout") + ggtitle("Midterm Years Only") + 
  theme_few() + theme(legend.title=element_blank()) + xlab("")

grid.arrange(p1, p2, p3, p1.15, p2.15, p3.15, ncol=3)
dev.off()

pdf("Figures/NYCparalleltrends_under15.pdf", height=8, width=11)
grid.arrange(p1, p2, p3, p1.15, p2.15, p3.15, ncol=2, as.table=F)
dev.off()


#add age to the model 
#this is to help address concerns about whether affected people are younger & so have different trends
everyone$coarseage <- round(everyone$meanagein2016) 
fit_mod_16_age <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), 
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'Voted16', df$'Voted12'), age = rep(df$coarseage,2),
                     year = c(rep(16, nrow(df)), rep(12, nrow(df))))
  
  model <- felm(voted~underMin*factor(year) + factor(age)|0|0|employee, data=long)
  return(model)
}

everyone_age <- fit_mod_16_age(everyone[(everyone$PayBasis == "per Hour"),])
everyone_age_se <- sqrt(diag(vcov(everyone_age)))

everyone_under15_age <- fit_mod_16_age(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone_under15_age_se <- sqrt(diag(vcov(everyone_under15_age)))

##and similarly, include age again
fit_mod_14_age <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), salary = rep(df$BaseSalary.2014,2),
                     underMin = rep(df$'UnderMin14', 2),  voted = c(df$'Voted14', df$'Voted10'),  age = rep(df$coarseage,2),
                     year = c(rep(14, nrow(df)), rep(10, nrow(df))))
  
  
  model <- felm(voted~underMin*factor(year) + factor(age)|0|0|employee, data=long)
  return(model)
}

everyone14_age <- fit_mod_14_age(everyone[(everyone$PayBasis == "per Hour"),])
everyone14_age_se <- sqrt(diag(vcov(everyone14_age)))

everyone14_under15_age <- fit_mod_14_age(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone14_under15_age_se <- sqrt(diag(vcov(everyone14_under15_age)))

stargazer(everyone_age, everyone_under15_age,everyone14_age, everyone14_under15_age,
          se=list(everyone_age_se,everyone_under15_age_se,everyone14_age_se, everyone14_under15_age_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2016", "2014", "Under New MW * 2016", "Under New MW * 2014"),
          dep.var.labels=c("Voted"),
          add.lines = list(c("Included Employees", "All Hourly", "Hourly Under \\$15",  "All Hourly", "Hourly Under \\$15" )),
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=("f"),
          omit="age", omit.labels="Age (year) dummies",
          title="Individual Difference-in-Differences Estimates, Including Age",
          out="Tables/everyone_mainestsplusage_bothyrs.tex", label="everyone_mainests_plusage") 


wagerates <- seq(from = 9.50, to= 40, by=.5)
cutoffresults <- as.data.frame(matrix(ncol=6, nrow=length(wagerates)))
colnames(cutoffresults) <- c("cutoff", "est", "pval", "CIlower", "CIupper", "n")
for (i in 1:length(wagerates)){
  cutoffresults$cutoff[i] <- cutoff <- wagerates[i]
  everyone_under <- fit_mod_16(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < cutoff),])
  cutoffresults$est[i] <- summary(everyone_under)$coef[4]
  cutoffresults$pval[i] <- summary(everyone_under)$coef[16]
  cutoffresults$CIlower[i]<- confint(everyone_under)[4]
  cutoffresults$CIupper[i] <- confint(everyone_under)[8]
  cutoffresults$n[i] <- summary(everyone_under)$df[2]
} 

pdf("Figures/MainDiD_diffefrentwagecutoffs.pdf")
plot(cutoffresults$cutoff, cutoffresults$est, main="Estimated Effect of MW Increase on 2016 Turnout, \n Different Inclusion Criteria for Comparison Group", ylab="Estimated Effect on 2016 Turnout", xlab="Highest Hourly Wage Included", ylim = c(0, .1))
segments(cutoffresults$cutoff, cutoffresults$CIlower, cutoffresults$cutoff, cutoffresults$CIupper, lwd=.5, col="gray10")
points(cutoffresults$cutoff, cutoffresults$est, pch=16)
abline(h=0, col="lightgray", lwd=2, lty=3)
dev.off()

##back in time placebo
fit_mod_placebo <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2),
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'Voted12', df$'Voted08'),
                     year = c(rep(12, nrow(df)), rep(08, nrow(df))))
  
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
}

everyone_all_pl <- fit_mod_placebo(everyone[(everyone$PayBasis == "per Hour"),])
everyone_all_pl_se <- sqrt(diag(vcov(everyone_all_pl)))


everyone_under15_pl <- fit_mod_placebo(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone_under15_pl_se <- sqrt(diag(vcov(everyone_under15_pl)))

stargazer(everyone_all_pl, everyone_under15_pl)

everyone_under12_pl <- fit_mod_placebo(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 12),])
everyone_under12_pl_se <- sqrt(diag(vcov(everyone_under12_pl)))


stargazer(everyone_all_pl,everyone_under15_pl,
          se=list(everyone_all_pl_se, everyone_under15_pl_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2012", "Under New MW * 2012"),
          dep.var.labels=c("Voted"),
          add.lines = list(c("Included Employees", "All Hourly", "Hourly Under \\$15")),
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=("f"),
          title="Placebo Individual Difference-in-Differences Estimates",
          out="Tables/everyone_placeboests_20082012.tex", label="everyone_placeboests")

df <- fread('Data/placebo_cutpoints.csv')
df$result <- .022/.003
df$result[df$year == 2017] <- .018/.001
df$result[df$year == 2014] <- .025/.002
df$year[df$year==2014] <- "Midterm (2010-2014)"
df$year[df$year==2016] <- "Presidential (2012-2016)"
df$year[df$year==2017] <- "Mayoral (2013-2017)"

pdf("Figures/placebocutpoints.pdf", height=4, width=7)
ggplot(df, aes(est/se)) + geom_density(bw=.5) + facet_wrap(~year) + geom_vline(aes(xintercept=result), linetype=2, alpha=.5) + theme_minimal() +xlab("t-Statistic") + xlim(-15,20) + theme_tufte()
dev.off()


###NOTE: This part require running the code to generate the Idaho data first
load("Intermediate Files/IDfortriplediffs.RData"); head(everyoneID)

everyoneID[, UnderMin:= underminmain] #set this to either underminmain or underquint for comparison.
#everyoneID[underquint==1, UnderMin:=TRUE];everyoneID[underquint==0, UnderMin:=FALSE]; everyoneID[, UnderMin:= as.logical(UnderMin)]
everyoneID[, state:= "IDAHO"]; everyone[, state:= "NEWYORK"]
cols <- c("employeekey", "state", "UnderMin", "Voted16", "Voted12", "Voted14", "Voted10", "MaxSalary")
bothstates <- rbind(subset(everyoneID, select=cols), subset(everyone, select=cols))
dim(bothstates); dim(everyoneID); dim(everyone)

#function to fit 2016 v 2012 triple-diffs
fit_mod_16_ddd <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), state=rep(df$'state',2),
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'Voted16', df$'Voted12'),
                     year = c(rep(16, nrow(df)), rep(12, nrow(df))))
  
  model <- felm(voted~underMin*factor(year)*factor(state)|0|0|employee, data=long)
  return(model)
}

everyone_all_td <- fit_mod_16_ddd(bothstates)
everyone_all_td_se <- sqrt(diag(vcov(everyone_all_td)))

everyone_under15_td <- fit_mod_16_ddd(bothstates[(bothstates$MaxSalary < 15),])
everyone_under15_td_se <- sqrt(diag(vcov(everyone_under15_td)))

stargazer(list(everyone_all_td, everyone_under15) ,
          se=list(everyone_all_td_se, everyone_under15_se), star.cutoffs=.05)

stargazer(everyone_all,everyone_all_td, everyone_under15, everyone_under15_td,
          se=list(everyone_all_se,  everyone_all_td_se, everyone_under15_se, everyone_under15_td_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2016","New York City",  "Under New MW * 2016","Under New MW * NYC", "2016 * NYC", "Under New MW * 2016 * NYC" ),
          dep.var.labels=c("Voted"),
          add.lines = list(c("Included Employees", "All Hourly",  "All Hourly","Hourly Under \\$15",  "Hourly Under \\$15")),
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=("f"),
          title="New York vs. Idaho, 2012-2016",
          out="Tables/NYCID_triplediffs_1216.tex", label="Idahotriplediffs1216") #now go make the same one for 10-14.


#function to fit 2014 v 2010 triple-diffs
fit_mod_14_ddd <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), state=rep(df$'state',2),
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'Voted14', df$'Voted10'),
                     year = c(rep(14, nrow(df)), rep(10, nrow(df))))
  
  model <- felm(voted~underMin*factor(year)*factor(state)|0|0|employee, data=long)
  return(model)
}

everyone14_all_td <- fit_mod_14_ddd(bothstates)
everyone14_all_td_se <- sqrt(diag(vcov(everyone14_all_td)))

everyone14_under15_td <- fit_mod_14_ddd(bothstates[(bothstates$MaxSalary < 15),])
everyone14_under15_td_se <- sqrt(diag(vcov(everyone14_under15_td)))



stargazer(everyone_all_td, everyone_under15_td, everyone14_all_td,everyone14_under15_td,
          se=list(everyone_all_td_se, everyone_under15_td_se,everyone14_all_td_se,everyone14_under15_td_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2016", "2014","New York City",  "Under New MW * 2016","Under New MW * 2014","Under New MW * NYC", "2016 * NYC", "Under New MW * 2016 * NYC", "2014 * NYC", "Under New MW * 2014 * NYC" ),
          dep.var.labels=c("Voted"),
          add.lines = list(c("Included Employees", "All Hourly",  "Hourly Under \\$15", "All Hourly", "Hourly Under \\$15")),
          title="New York vs. Idaho (Triple-Differences)",
          out="Tables/NYCID_triplediffs_bothyrs.tex", label="Idahotriplediffsbothyrs")



load("Intermediate Files/IDfortriplediffs.RData"); head(everyoneID)

everyoneID[, UnderMin:= underquint] #set this to either underminmain or underquint for comparison.
#everyoneID[underquint==1, UnderMin:=TRUE];everyoneID[underquint==0, UnderMin:=FALSE]; everyoneID[, UnderMin:= as.logical(UnderMin)]
everyoneID[, state:= "IDAHO"]; everyone[, state:= "NEWYORK"]
cols <- c("employeekey", "state", "UnderMin", "Voted16", "Voted12", "Voted14", "Voted10", "MaxSalary")
bothstates <- rbind(subset(everyoneID, select=cols), subset(everyone, select=cols))
dim(bothstates); dim(everyoneID); dim(everyone)


everyone_all_td <- fit_mod_16_ddd(bothstates)
everyone_all_td_se <- sqrt(diag(vcov(everyone_all_td)))

everyone_under15_td <- fit_mod_16_ddd(bothstates[(bothstates$MaxSalary < 15),])
everyone_under15_td_se <- sqrt(diag(vcov(everyone_under15_td)))


everyone14_all_td <- fit_mod_14_ddd(bothstates)
everyone14_all_td_se <- sqrt(diag(vcov(everyone14_all_td)))

everyone14_under15_td <- fit_mod_14_ddd(bothstates[(bothstates$MaxSalary < 15),])
everyone14_under15_td_se <- sqrt(diag(vcov(everyone14_under15_td)))

stargazer(everyone_all_td, everyone_under15_td, everyone14_all_td,everyone14_under15_td,
          se=list(everyone_all_td_se, everyone_under15_td_se,everyone14_all_td_se,everyone14_under15_td_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2016", "2014","New York City",  "Under New MW * 2016","Under New MW * 2014","Under New MW * NYC", "2016 * NYC", "Under New MW * 2016 * NYC", "2014 * NYC", "Under New MW * 2014 * NYC" ),
          dep.var.labels=c("Voted"),
          add.lines = list(c("Included Employees", "All Hourly",  "Hourly Under \\$15", "All Hourly", "Hourly Under \\$15")),
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=("f"),
          title="New York vs. Idaho (Triple-Differences), Bottom Pay Quintile ``Affected''",
          out="Tables/NYCID_triplediffs_bothyrs_quintile.tex", label="Idahotriplediffsbothyrs_quint")



## Now run the version with the FastLink merged dataset
everyone <- fread('Data/merged_fastlink.csv')


#we only want people that earned above the minimum wage
everyone[, minsalary:= min(BaseSalary.2011,BaseSalary.2012, BaseSalary.2013, BaseSalary.2014, na.rm=T), by=list(employeekey, PayBasis)]
everyone <- everyone[!(minsalary <7.25) &  !is.infinite(minsalary),]; dim(everyone)
everyone[, MaxSalary:= max(BaseSalary.2011, BaseSalary.2012, BaseSalary.2013, BaseSalary.2014, na.rm=T), by=list(employeekey, PayBasis)]

##add the undermin variable
everyone[!is.na(BaseSalary.2011) | !is.na(BaseSalary.2012) | !is.na(BaseSalary.2013)| !is.na(BaseSalary.2014), UnderMin:= FALSE] #everyone working in this period
everyone[(everyone$'BaseSalary.2011' < 8.00 | everyone$'BaseSalary.2012' < 8.00 | everyone$'BaseSalary.2013' < 8.00 | everyone$'BaseSalary.2014' < 8.00 ), UnderMin:=TRUE]


setkey(everyone, employeekey)

#function to fit 2016 v 2012 panels for a given dataset
fit_mod_16 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), #salary = rep(df$'BaseSalary.2016',2),
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'voted16', df$'voted12'),
                     year = c(rep(16, nrow(df)), rep(12, nrow(df))))
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
}

everyone_under15 <- fit_mod_16(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone_under15_se <- sqrt(diag(vcov(everyone_under15)))

everyone_all <- fit_mod_16(everyone[(everyone$PayBasis == "per Hour"),])
everyone_all_se <- sqrt(diag(vcov(everyone_all)))


everyone$UnderMin14 <- everyone$UnderMin 

#function to fit 2014 v 2010 panels for a given dataset
fit_mod_14 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2),
                     underMin = rep(df$'UnderMin14', 2),  voted = c(df$'voted14', df$'voted10'),
                     year = c(rep(14, nrow(df)), rep(10, nrow(df))))
  
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
}

everyone14_all <- fit_mod_14(everyone[(everyone$PayBasis == "per Hour"),])
everyone14_all_se <- sqrt(diag(vcov(everyone14_all)))

everyone14_under15 <- fit_mod_14(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone14_under15_se <- sqrt(diag(vcov(everyone14_under15)))

everyone$UnderMin17 <- everyone$UnderMin #using the same one for now

#function to fit 2017 v 2013 panels for a given dataset
fit_mod_17 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), salary = rep(df$BaseSalary.2014,2),
                     weights=rep(df$'posterior',2),
                     underMin = rep(df$'UnderMin17', 2),  voted = c(df$'voted17', df$'voted13'),
                     year = c(rep(17, nrow(df)), rep(13, nrow(df))))
  
  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  return(model)
  
}

everyone17_all <- fit_mod_17(everyone[(everyone$PayBasis == "per Hour"),])
everyone17_all_se <- sqrt(diag(vcov(everyone17_all)))

everyone17_under15 <- fit_mod_17(everyone[(everyone$PayBasis == "per Hour" & everyone$MaxSalary < 15),])
everyone17_under15_se <- sqrt(diag(vcov(everyone17_under15)))


stargazer::stargazer(everyone_all, everyone_under15,everyone14_all, everyone14_under15, everyone17_all, everyone17_under15,
                     se=list(everyone_all_se,everyone_under15_se,everyone14_all_se, everyone14_under15_se, everyone17_under15_se, everyone17_all_se, everyone17_under15_se), star.cutoffs=.05,
                     covariate.labels=c("Under New Min. Wage", "2016", "Under New MW * 2016", "2014", "Under New MW * 2014","2017", "Under New MW * 2017"),
                     column.labels=c("voted in Presidential", "voted in Midterm",  "voted in Mayoral"),
                     column.separate = c(2,2,2),
                     add.lines = list(c("Included Employees", "All Hourly", "Hourly Under \\$15",  "All Hourly", "Hourly Under \\$15","All Hourly", "Hourly Under \\$15" )),
                     notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=c("f", 'ser'),
                     title="Main Individual Difference-in-Differences Estimates",
                         out="Tables/everyone_mainests_bothyrs_fastlinkcheck",
                     label="everyone_mainests_fastlink", dep.var.labels.include=FALSE) 

